Group 6: SIGMA mbha0014 Mayunk Bharadwaj pgre0007 Priscila Grecov
Presentation: TUE 4-6.00PM - order 6 - question Mitch
Our question:
We need just the sensors that measure the relative humidity that are the ones with sensor_id = 5b and sensor_id = 5b.EPA-1hr
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
unique(sensor_readingsRH$sensor_id)
[1] "5b.EPA-1h" "5b"
unique(sensor_readingsRH$month)
[1] Nov Dec Jan Sep Feb Aug Mar Apr May Jul Jun Oct
Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
apply(sensor_readingsRH, 2, function(x) sum(is.na(x)))
id site_id sensor_id value local_time type units date timeStamp hour month hourMin
0 0 0 4 0 0 0 0 0 0 0 0
unique(sensor_readingsRH$site_id)
[1] "arc1045" "arc1046" "arc1047" "arc1048" "arc1050"
unique(sensor_readingsRH$date)
[1] "2019-11-15" "2019-11-16" "2019-11-18" "2019-11-19" "2019-11-20" "2019-11-17" "2019-11-21" "2019-11-22" "2019-11-23" "2019-11-24"
[11] "2019-11-25" "2019-11-26" "2019-11-27" "2019-11-28" "2019-11-29" "2019-11-30" "2019-12-01" "2019-12-03" "2019-12-02" "2019-12-04"
[21] "2019-12-06" "2019-12-07" "2019-12-05" "2019-12-08" "2019-12-09" "2019-12-10" "2019-12-11" "2019-12-12" "2019-12-13" "2019-12-14"
[31] "2019-12-15" "2019-12-16" "2019-12-17" "2019-12-19" "2019-12-20" "2019-12-18" "2019-12-22" "2019-12-21" "2019-12-23" "2019-12-24"
[41] "2019-12-25" "2019-12-26" "2019-12-28" "2019-12-29" "2019-12-31" "2019-12-27" "2019-12-30" "2020-01-01" "2020-01-03" "2020-01-02"
[51] "2020-01-04" "2020-01-05" "2020-01-06" "2020-09-09" "2020-01-07" "2020-01-08" "2020-01-09" "2020-01-10" "2020-01-11" "2020-01-12"
[61] "2020-01-13" "2020-01-14" "2020-01-16" "2020-01-15" "2020-01-17" "2020-01-18" "2020-01-19" "2020-01-21" "2020-01-22" "2020-01-23"
[71] "2020-01-24" "2020-01-20" "2020-01-25" "2020-01-26" "2020-01-28" "2020-01-27" "2020-01-29" "2020-01-31" "2020-01-30" "2020-02-02"
[81] "2020-02-01" "2020-02-03" "2020-02-05" "2020-02-06" "2020-02-07" "2020-02-04" "2020-02-08" "2020-02-09" "2020-02-10" "2020-02-11"
[91] "2020-02-12" "2020-02-13" "2020-08-18" "2020-02-14" "2020-02-16" "2020-02-17" "2020-02-18" "2020-02-19" "2020-02-20" "2020-02-15"
[101] "2020-02-21" "2020-02-22" "2020-02-23" "2020-02-24" "2020-02-26" "2020-02-25" "2020-09-25" "2020-02-27" "2020-02-29" "2020-03-01"
[111] "2020-02-28" "2020-03-02" "2020-03-03" "2020-03-04" "2020-03-05" "2020-03-06" "2020-03-07" "2020-03-08" "2020-03-09" "2020-03-10"
[121] "2020-08-15" "2020-03-11" "2020-03-13" "2020-03-14" "2020-03-12" "2020-03-15" "2020-03-16" "2020-03-17" "2020-03-18" "2020-03-19"
[131] "2020-03-20" "2020-03-22" "2020-08-08" "2020-03-23" "2020-03-24" "2020-03-25" "2020-03-21" "2020-03-26" "2020-03-28" "2020-03-27"
[141] "2020-03-29" "2020-03-30" "2020-03-31" "2020-04-01" "2020-04-02" "2020-04-03" "2020-04-04" "2020-04-05" "2020-04-06" "2020-04-07"
[151] "2020-04-08" "2020-04-10" "2020-04-09" "2020-04-11" "2020-04-12" "2020-04-13" "2020-04-14" "2020-04-15" "2020-04-16" "2020-04-17"
[161] "2020-04-18" "2020-04-19" "2020-04-20" "2020-04-21" "2020-04-23" "2020-04-22" "2020-04-24" "2020-04-26" "2020-04-25" "2020-04-27"
[171] "2020-04-28" "2020-04-29" "2020-04-30" "2020-05-01" "2020-05-02" "2020-05-04" "2020-05-05" "2020-05-03" "2020-05-07" "2020-05-10"
[181] "2020-05-08" "2020-05-06" "2020-05-13" "2020-05-14" "2020-05-11" "2020-05-09" "2020-05-15" "2020-05-16" "2020-05-17" "2020-05-12"
[191] "2020-05-18" "2020-05-19" "2020-05-20" "2020-05-21" "2020-05-22" "2020-05-23" "2020-07-27" "2020-05-24" "2020-05-25" "2020-05-26"
[201] "2020-05-27" "2020-05-28" "2020-05-29" "2020-05-30" "2020-05-31" "2020-06-01" "2020-06-02" "2020-06-03" "2020-06-04" "2020-06-05"
[211] "2020-06-06" "2020-06-07" "2020-06-08" "2020-06-09" "2020-06-10" "2020-06-11" "2020-06-12" "2020-06-13" "2020-06-14" "2020-06-15"
[221] "2020-06-16" "2020-06-17" "2020-06-18" "2020-06-19" "2020-06-20" "2020-06-21" "2020-06-22" "2020-06-23" "2020-06-24" "2020-06-25"
[231] "2020-07-31" "2020-07-03" "2020-06-28" "2020-08-25" "2020-07-01" "2020-07-13" "2020-08-22" "2020-07-10" "2020-07-15" "2020-07-22"
[241] "2020-07-19" "2020-07-18" "2020-07-25" "2020-07-28" "2020-08-06" "2020-06-30" "2020-07-09" "2020-07-12" "2020-07-16" "2020-07-04"
[251] "2020-08-03" "2020-07-08" "2020-08-09" "2020-08-12" "2020-07-30" "2020-07-05" "2020-07-06" "2020-07-07" "2020-07-02" "2020-06-26"
[261] "2020-06-29" "2020-06-27" "2020-09-05" "2020-07-11" "2020-10-19" "2020-09-01" "2020-08-28" "2020-10-01" "2020-08-02" "2020-07-24"
[271] "2020-09-27" "2020-09-13" "2020-08-14" "2020-09-26" "2020-09-18" "2020-09-22" "2020-07-21" "2020-10-10" "2020-10-06" "2020-10-15"
[281] "2020-08-23" "2020-09-14" "2020-08-26" "2020-07-20" "2020-10-29" "2020-10-24" "2020-07-26" "2020-08-29" "2020-09-10" "2020-07-14"
[291] "2020-10-02" "2020-08-19" "2020-08-17" "2020-08-16" "2020-07-23" "2020-08-30" "2020-07-17" "2020-08-05" "2020-07-29" "2020-10-11"
[301] "2020-08-07" "2020-10-20" "2020-08-04" "2020-08-01" "2020-08-13" "2020-09-02" "2020-08-20" "2020-08-10" "2020-08-11" "2020-09-19"
[311] "2020-09-06" "2020-09-23" "2020-08-27" "2020-10-28" "2020-08-21" "2020-10-25" "2020-09-08" "2020-09-04" "2020-09-17" "2020-08-24"
[321] "2020-10-05" "2020-08-31" "2020-09-21" "2020-09-03" "2020-10-07" "2020-09-15" "2020-09-11" "2020-09-07" "2020-10-14" "2020-10-23"
[331] "2020-10-16" "2020-09-28" "2020-10-18" "2020-10-03" "2020-09-20" "2020-09-24" "2020-09-12" "2020-10-12" "2020-10-30" "2020-09-16"
[341] "2020-10-21" "2020-10-08" "2020-10-09" "2020-09-29" "2020-10-04" "2020-10-27" "2020-10-17" "2020-10-13" "2020-10-26" "2020-09-30"
[351] "2020-10-22"
1ST) HOW RELATIVITY HUMIDITY CHANGE THROUGH THE YEAR - EXPLORING THE DATA BY MONTH OVER THE YEAR
By the graph below, the different sensors show the same pattern over the year. Then, we can join all the sensors taken the average or median.
Just three days during all the year presented daily average below 30%: 21/11/2019, 20/12/2019, 30/01/2020.
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(site_id, month) %>%
summarise(median_RH = median(value), .groups = 'drop') %>%
ggplot(aes(x=month, y=median_RH, group=site_id, color=site_id)) +
geom_line() +
theme_ipsum()
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(month) %>%
summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
ggplot(aes(x=month, group = 1)) +
geom_line(aes(y=mean_RH), color="blue") +
geom_line(aes(y=median_RH), color="red") +
theme_ipsum()
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(month) %>%
summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
filter(mean_RH >= 70 | median_RH >= 70)
The months of May to August are the worst moths where the RH averages are greater than 70%. November seems to be the month with the lowest RH average (but in this dataset we have just the 2nd half data for November).
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(month) %>%
summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop')
2ND) HOW RELATIVITY HUMIDITY CHANGE THROUGH THE DAY - EXPLORING THE DATA BY HOUR OVER THE DAY.
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(site_id, hour) %>%
summarise(mean_RH = mean(value), .groups = 'drop') %>%
ggplot(aes(x=hour, y=mean_RH, group=site_id, color=site_id)) +
geom_line() +
theme_ipsum() +
scale_x_continuous(breaks = seq(0,23,1))
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(hour) %>%
summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
ggplot(aes(x=hour)) +
geom_line(aes(y=mean_RH), color="blue") +
geom_line(aes(y=median_RH), color="red") +
geom_line(aes(y=70), color="black", linetype = "dashed") +
geom_line(aes(y=30), color="purple", linetype = "dashed") +
theme_ipsum() +
scale_x_continuous(breaks = seq(0,23,1))
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
facet1 <- sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(month, hour) %>%
summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
ggplot(aes(x=hour)) +
geom_line(aes(y=mean_RH), color="blue") +
geom_line(aes(y=median_RH), color="red") +
geom_line(aes(y=70), color="black", linetype = "dotted") +
geom_line(aes(y=30), color="purple", linetype = "dotted") +
facet_wrap(vars(month)) +
scale_x_continuous(breaks = seq(0,23,1)) +
theme(axis.text.x=element_text(angle=60, hjust=1, size = 7))
ggplotly(facet1) %>%
config(displayModeBar = F)
`group_by_()` is deprecated as of dplyr 0.7.0.
Please use `group_by()` instead.
See vignette('programming') for more help
[90mThis warning is displayed once every 8 hours.[39m
[90mCall `lifecycle::last_warnings()` to see where this warning was generated.[39m
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(month, hour) %>%
summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
filter(mean_RH <= 30 | median_RH <= 30)
sensor_readingsRH %>%
filter(sensor_id == "5b.EPA-1h") %>%
group_by(hour, month) %>%
summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
ggplot(aes(x=month, group = 1)) +
geom_line(aes(y=mean_RH), color="blue") +
geom_line(aes(y=median_RH), color="red") +
geom_line(aes(y=70), color="black", linetype = "dashed", size=0.5) +
geom_line(aes(y=30), color="purple", linetype = "dashed", size=0.5) +
theme(axis.text.x=element_text(angle=60, hjust=1, size = 7)) +
facet_wrap(vars(hour))
sensor_readingsRH %>%
filter(sensor_id == "5b") %>%
filter(!is.na(value)) %>%
group_by(hourMin2) %>%
summarise(mean_RH = mean(value), .groups = 'drop') %>%
ggplot(aes(x=hourMin2)) +
geom_line(aes(y=mean_RH), color="blue") +
theme_ipsum() +
scale_x_time()